knitr::opts_chunk$set(message = FALSE)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
Data input and cleaning
bakery_df =
read_csv("./Data/Bakery_sales.csv") %>%
janitor::clean_names() %>%
mutate(
unit_price = str_replace(unit_price, "€", ""),
unit_price = str_replace(unit_price, ",", "."),
unit_price = as.numeric(unit_price),
product_name = article) %>%
filter(product_name != ".") %>%
select(-article)
bakery_df
## # A tibble: 234,000 × 7
## x1 date time ticket_number quantity unit_price product_name
## <dbl> <date> <time> <dbl> <dbl> <dbl> <chr>
## 1 0 2021-01-02 08:38 150040 1 0.9 BAGUETTE
## 2 1 2021-01-02 08:38 150040 3 1.2 PAIN AU CHOCOLAT
## 3 4 2021-01-02 09:14 150041 2 1.2 PAIN AU CHOCOLAT
## 4 5 2021-01-02 09:14 150041 1 1.15 PAIN
## 5 8 2021-01-02 09:25 150042 5 1.2 TRADITIONAL BAGUET…
## 6 11 2021-01-02 09:25 150043 2 0.9 BAGUETTE
## 7 12 2021-01-02 09:25 150043 3 1.1 CROISSANT
## 8 15 2021-01-02 09:27 150044 1 1.05 BANETTE
## 9 18 2021-01-02 09:32 150045 3 1.2 TRADITIONAL BAGUET…
## 10 19 2021-01-02 09:32 150045 6 1.1 CROISSANT
## # … with 233,990 more rows
A pie chart showing the percentage of each product’s sale count (top 10)
plot_pie =
bakery_df %>%
group_by(product_name) %>%
summarize(n_obs = n()) %>%
arrange(desc(n_obs)) %>%
head(10) %>%
plot_ly(labels = ~product_name, values = ~n_obs, type = 'pie', colors = "viridis") %>%
layout(title = 'Top 10 favoriate bakeries',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
plot_pie
A line plot of trends showing total sale revenue by month (trend)
plot_line =
bakery_df %>%
separate(date, into = c("year", "month", "day"), sep = "-") %>%
mutate(year = as.numeric(year),
month = as.integer(month),
day = as.integer(day),) %>%
group_by(year, month) %>%
mutate(rev = quantity * unit_price) %>%
summarize(month_rev = sum(rev)) %>%
plot_ly(x = ~month, y = ~month_rev, mode = 'lines+markers', alpha = 0.5)
plot_line